## In-Line Findings

## install.packages(c("tidyverse"))
## library(tidyverse)
## library(fixst)

## SET WORKING DIRECTORY HERE
## setwd()

## Loading data
## load("dta.RData")
## load("demand.RData")
## load("tiles.RData")

#### Page 2: "neighborhoods in the top terciles of the city’s white population and income distribution
#### receive about 4-6% faster response times — a difference of about 30-48 hours relative to the
#### average wait time — than do neighborhoods in the bottom terciles"

race_wait_across <- feols(log_wait_time ~ factor(white_third) |
                            city^open_month^open_year, data = dta,
                          cluster = "geo")
inc_wait_across <- feols(log_wait_time ~ factor(inc_third) |
                           city^open_month^open_year, data = dta,
                         cluster = "geo")

race_percent = race_wait_across$coefficients["factor(white_third)1"]
inc_percent  = inc_wait_across$coefficients["factor(inc_third)1"]

avg_wait = mean(dta$wait_time, na.rm = T)

cat("neighborhoods in the top terciles of the city’s white population and income distribution receive about ",
    round(min(race_percent,inc_percent)*100,1),"-",
    round(max(race_percent,inc_percent)*100,1),"% faster response times - a difference of about ",
    round(avg_wait*min(race_percent,inc_percent), 0),"-",
    round(avg_wait*max(race_percent,inc_percent), 0), 
    " hours relative to the average wait time - than do neighborhoods in the bottom terciles", sep = "")

#### Page 2: "For race in particular, these gaps are even larger when comparing the very top and very bottom 
#### deciles (as large as a 4 day difference relative to the average)"

race_wait_across_decile <- feols(log_wait_time ~ factor(white_decile) |
                                   city^open_month^open_year, data = dta,
                                 cluster = "geo")

race_decile_days = race_wait_across_decile$coefficients["factor(white_decile)1"] * avg_wait / 24

cat("For race in particular, these gaps are even larger when comparing the very top and very bottom",
    "deciles (as large as a", round(race_decile_days,0), "day difference relative to the average)")

## Pages 12 and 13: Mean wait time and expected wait times
median(dta$wait_time, na.rm = T)
mean(dta$wait_time, na.rm = T)
mean(dta$expected_time[dta$city != "New York"], na.rm = T)

## Page 12: Footnote 10, % of NY calls w/ an assigned due date
ny = dta %>%
  filter(city == "New York")

ny_yesdue = ny %>%
  filter(!is.na(expected_time))

nrow(ny_yesdue)/nrow(ny)

## Page 13: Footnote 11, correlation between income and race terciles
cor(as.numeric(tiles$white_third), as.numeric(tiles$inc_third), use = "complete.obs")

#### Page 18-19: "service requests from neighborhoods in the bottom tercile of a city’s percent
#### white distribution are responded to 7.5% slower than are requests from the city’s top tercile"

cat("service requests from neighborhoods in the bottom tercile of a city’s percent white distribution are",
    " responded to ", round(race_percent*100,1), "% slower than are requests from the city’s top tercile", sep = "")

#### Page 18-19: "Recall that the mean wait time is 610 hours, or 25 days"

cat("Recall that the mean wait time is",
    round(avg_wait,0), "hours, or", round(avg_wait/24 , 0), "days")

#### Page 19: "This suggests that neighborhoods in the bottom tercile of the white population wait just
#### over 46 hours longer — almost 2 days — than do neighborhoods in the top tercile"

cat("This suggests that neighborhoods in the bottom tercile of the white population wait just over",
    round(race_percent*avg_wait, 0), "hours longer - almost 2 days, than do neighborhoods in the top tercile") 

#### Page 19: "Effects for income are slightly smaller in magnitude"
cat(round(race_percent*100,1), "(race) >", round(inc_percent*100,1), "(income)")

#### Page 19: "in Washington, DC, calls for alley cleaning overwhelming come from non-white neighborhoods...
#### in April 2018, for instance, the mean wait time for alley cleaning was 351 hours (15 days) while
#### the wait time for parking meter repairs was 208 hours (9 days)"

dc_alley = dta |> 
  filter(city_service == "Washington, DC - Alley Cleaning",
         open_month == 4, open_year == 2018) |> 
  summarise(avg = mean(wait_time, na.rm = T)) |> 
  pull(avg) |> floor()

dc_parking = dta |> 
  filter(city_service == "Washington, DC - Parking Meter Repair",
         open_month == 4, open_year == 2018) |> 
  summarise(avg = mean(wait_time, na.rm = T)) |> 
  pull(avg) |> floor()

cat("in Washington, DC, calls for alley cleaning overwhelming come from non-white neighborhoods...",
    "in April 2018, for instance, the mean wait time for alley cleaning was", dc_alley, "(15 days)",
    "while the wait time for parking meter repairs was", dc_parking, "(9 days)")

#### Page 19: "Here, expected wait times for requests from neighborhoods in the bottom tercile of race
#### and income are about 21% longer than expected wait times for requests from the top tercile"

race_expected_across <- feols(log_expected_time ~ factor(white_third) |
                                city^open_month^open_year, data = dta %>% filter(city != "New York"),
                              cluster = "geo")

inc_expected_across <- feols(log_expected_time ~ factor(inc_third) |
                               city^open_month^open_year, data = dta %>% filter(city != "New York"),
                             cluster = "geo")

race_exp_percent = race_expected_across$coefficients["factor(white_third)1"]
inc_exp_percent = inc_expected_across$coefficients["factor(inc_third)1"]

exp_wait = mean(c(race_exp_percent, inc_exp_percent)*100) |> floor()

cat("Here, expected wait times for requests from neighborhoods in the bottom tercile of race ",
    "and income are about ", exp_wait, "% longer than expected wait times for requests from the top tercile", sep = "")

#### Page 22: "Here, averaging across service areas, cities respond about 16.4% faster to calls 
#### placed by the top decile relative to calls placed by the bottom decile"

race_dec_percent = race_wait_across_decile$coefficients["factor(white_decile)1"]

cat("Here, averaging across service areas, cities respond about ",
    round(race_dec_percent*100,1),
    "% faster to calls placed by the top decile relative to calls placed by the bottom decile", sep = "")

#### Page 24: "number of unique services that we study — 2,544 in total"

# If you sum up the # of services column in Table 1, you get 2544:
city_summ = dta %>%
  group_by(city) %>%
  mutate(n = 1) %>%
  summarise(first_request = min(open_date),
            last_request = max(open_date),
            tot_requests = sum(n, na.rm = TRUE),
            tot_services = length(unique(service))) %>%
  mutate(pop_rank = c(11, 30, 24, 9, 19, 4, 2, 29, 21, 1, 6, 17, 23)) %>%
  arrange(pop_rank) %>%
  select(-pop_rank) %>%
  ungroup() %>%
  mutate(tot_requests = format(tot_requests, big.mark = ",", scientific = FALSE),
         first_request = as.character(format(as.Date(.$first_request, format = "%Y-%m-%d"), "%m/%d/%Y")),
         last_request = as.character(format(as.Date(.$last_request, format = "%Y-%m-%d"), "%m/%d/%Y")))

n_service = sum(city_summ$tot_services)

cat("number of unique services that we study -", n_service, "in total")

#### Page 25: "in December 2011, low-income demand was about 22 percentage points higher than demand
#### from high-income neighborhoods (45.28% of calls versus 23.23%). In March 2018, this gap was half as large."

sf_diff_2011 = demand |> 
  filter(city_service == "San Francisco - Street and Sidewalk Cleaning",
         open_month == 12, open_year == 2011) |> 
  select(poor_demand, pct_poor, pct_rich)

sf_diff_2018 = demand |> 
  filter(city_service == "San Francisco - Street and Sidewalk Cleaning",
         open_month == 3, open_year == 2018) |> 
  select(poor_demand, pct_poor, pct_rich)

bind_rows(sf_diff_2011, sf_diff_2018) |> 
  mutate(month_year = c("12-2011","03-2018"))
  